In [1]:
import numpy as np
import pandas as pd
In [2]:
%%writefile 1.txt
This is a story about cats
our feline pets
Cats are furry animals
In [3]:
%%writefile 2.txt
This story is about surfing
Catching waves is fun
Surfing is a popular water sport
In [6]:
vocab = {}
i = 1
with open('1.txt') as f:
file = f.read().lower().split()
for word in file:
if word in vocab:
continue
else:
vocab[word] = i
i+=1
print(vocab)
In [7]:
with open('2.txt') as f:
file = f.read().lower().split()
for word in file:
if word in vocab:
continue
else:
vocab[word] = i
i+=1
print(vocab)
In [11]:
file_one = ['1.txt'] + [0]*len(vocab)
file_two = ['2.txt'] + [0]*len(vocab)
print(file_one)
print(file_two)
In [15]:
with open('1.txt') as f:
for word in f.read().lower().split():
file_one[vocab[word]]+= 1
file_one
Out[15]:
In [16]:
with open('2.txt') as f:
for word in f.read().lower().split():
file_two[vocab[word]]+= 1
file_two
Out[16]:
In [17]:
print(file_one)
print(file_two)
In [ ]: